Abstract
This notebook is a template for evaluating your forecaster against submissions from COVIDhub. After inputting a set of parameters (forecasters, COVID signal, etc), the templates yields a comprehensive report on the predictions of COVID forecasters as well as their performance compared to the ground truth.
Package installation
Every week, forecasters submit their predictions to COVID-19 ForecastHub. An AWS bucket that contains the estimates of a handful of signals (e.g., COVID death, cases, hospitalization, etc). Furthermore, the AWS server stores an array of evaluation metrics of these forecasts (e.g., Absolute Error, Weighted Interval Score). Alternatively, the data can be retrieved from covidcast and covideval APIs.
aheads <- as.numeric(params$weeks)
# if(params$outcome=="cases") {
# #This is for the cases:
# url_case <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_cases.rds"
# download.file(url_case, "eval_cases.RDS") # download to disk
# scores <- readRDS(paste0(here(), "/eval_cases.RDS"))
# scores <- subset(scores, forecaster %in% params$forecasters)
# }
# if(params$outcome=="deaths"){
url_deaths <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url_deaths, "eval_deaths.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores <- subset(scores, forecaster %in% params$forecasters)
# }
primary_forecaster <- params$primary_forecaster
our_pred_dates <-
scores %>%
filter(forecaster == "CMU-TimeSeries")
our_pred_dates <- unique(our_pred_dates$forecast_date)
n_dates <- length(our_pred_dates)
# n_dates - 4 is often the most recent date with ground truth for 4 weeks ahead
# dates spaced out 2 weeks ahead to see different behaviors
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]
scores$forecast_date <-
if_else(scores$forecaster %in% c("Karlen-pypm", "CU-select"), scores$forecast_date + 1, scores$forecast_date)
scores <- subset(scores, forecast_date %in% forecast_dates & ahead <= aheads)
results <- intersect_averagers(scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))
results %>%
group_by(forecast_date) %>%
summarise(n_distinct(geo_value))
To prompt the flexibility to replicate the report, the data used in this report can be easily downloaded as a CSV file. By doing so, the user can generate customized plots or even include their own forecaster.
results %>%
download_this(
output_name = "results",
output_extension = ".csv",
button_label = "Download Predictions Evaluation",
button_type = "success",
has_icon = TRUE,
csv2 = FALSE,
icon = "fa fa-save"
)
NOTE: Results are based on the following numbers of common locations
By Weeks Ahead
weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)
subtitle = sprintf("Forecasts made over %s to %s",
format(min(forecast_dates), "%B %d, %Y"),
format(max(forecast_dates), "%B %d, %Y"))
plot_ae <-
plot_canonical(results,
x = "ahead",
y = "ae",
aggr = mean) +
labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +
# geom_line(aes(linetype=forecaster, color=forecaster)) +
geom_point(aes(color=forecaster, text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s",
ahead,
round(ae, digits=2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_log10()
if (params$colorblind_palette) {
plot_ae <- plot_ae +
scale_color_viridis_d()
}
ggplotly(plot_ae, tooltip="text", width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_wis <-
plot_canonical(results,
x = "forecast_date",
y = "wis",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle,
x = "Forecast Dates",
y = "Mean WIS",
color = "Forecasters") +
geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
scale_y_log10()
if (params$colorblind_palette) {
plot_wis <- plot_wis +
scale_color_viridis_d()
}
ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_cov80 <-
plot_canonical(results,
x = "forecast_date",
y = "cov_80",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s",
forecast_date,
round(cov_80, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = .8), )
if (params$colorblind_palette) {
plot_cov80 <- plot_cov80 +
scale_color_viridis_d()
}
ggplotly(plot_cov80, tooltip="text", height=800, width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s.
geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out
mean_wis <-
plot_canonical(results %>%
filter(wis > 0),
x = "ahead",
y = "wis",
aggr = geom_mean,
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
labs(title = subtitle,
x = "Weeks Ahead",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s",
ahead,
round(wis, digits = 2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis <- mean_wis +
scale_color_viridis_d()
}
ggplotly(mean_wis, tooltip="text", width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean_wis_forecast_date <-
plot_canonical(results %>%
filter(wis > 0),
x = "forecast_date",
y = "wis",
aggr = geom_mean,
facet_rows = "ahead",
grp_vars = c("forecaster", "ahead"),
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
theme(legend.position = "bottom") +
labs(title = subtitle,
x = "Forecast date",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis_forecast_date <- mean_wis_forecast_date +
scale_color_viridis_d()
}
ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean score over forecast dates and aheads Note that the results are scaled by population.
library(sf)
maps <- results %>%
group_by(geo_value, forecaster) %>%
summarise(across(wis:cov_80, mean)) %>%
left_join(animalia::state_population, by = "geo_value") %>%
mutate(across(wis:cov_80, ~ .x / population * 1e5)) %>%
pivot_longer(wis:cov_80, names_to = "score") %>%
group_by(score) %>%
mutate(time_value = Sys.Date(),
r = max(value)) %>%
group_by(forecaster, .add = TRUE) %>%
group_split()
# for county prediction, set geo_type = "county"
maps <- purrr::map(maps,
~as.covidcast_signal(
.x, signal = .x$score[1],
data_source = .x$forecaster[1],
geo_type = "state"))
maps <- purrr::map(maps,
~plot(.x,
choro_col = scales::viridis_pal()(3),
range = c(0,.x$r[1])))
nfcasts <- length(unique(results$forecaster))
# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)
cowplot::plot_grid(plotlist = maps[(nfcasts+1):(nfcasts*2)], ncol = 3)
cowplot::plot_grid(plotlist = maps[((nfcasts*2)+1):length(maps)], ncol = 3)
For county predictions, you might want to change the fig.height and fig.width chunk options here (in other notebooks we use fig.height = 120 and fig.width = 30).
options(timeout=300)
url4 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/predictions_cards.rds"
download.file(url4, "predictions.RDS") # download to disk
state_predictions <- readRDS(paste0(here(), "/predictions.RDS"))
# Check how many aheads in the state_predictions data
# Check the forecast_end_date column
# for county prediction, set geo_type = "county"
state.label = c(state.name, "Washington D.C.", "Porto Rico")
names(state.label) = c(tolower(state.abb), "dc", "pr")
# trajectory plots for selected forecaster
pd <- evalcast:::setup_plot_trajectory(
state_predictions %>% filter(forecaster=="CMU-TimeSeries" & forecast_date %in% forecast_dates),
intervals = 0.8,
geo_type = "state",
start_day = min(forecast_dates) - 60)
g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# build the fan
g <- g + geom_ribbon(
data = pd$quantiles_df %>%
mutate(upper = round(upper, 2),
lower = round(lower, 2)),
mapping = aes(ymin = lower,
ymax = upper,
fill = forecaster,
group = interaction(forecaster, forecast_date)),
alpha = .1) +
scale_fill_viridis_d(begin=.15, end=.85)
# line layer
g <- g +
#geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
geom_line(aes(y = value), size = .5) + # reported
geom_line(data = pd$points_df,
mapping = aes(y = value,
color = forecaster,
group = interaction(forecaster, forecast_date)),
size = .5) +
geom_point(aes(y = value), size = 1) + # reported gets dots
geom_point(data = pd$points_df %>%
mutate(value = round(value, 2)),
mapping = aes(y = value, color = forecaster),
size = 1) +
scale_color_viridis_d(begin=.15, end=.85)
states <- g +
facet_wrap(~geo_value,
scales = "free_y",
ncol = 3,
labeller = labeller(geo_value = state.label)) +
labs(x = "", y = "") +
theme_bw() +
theme(legend.position = "none", strip.background = element_rect(fill = "white"))
ggplotly(states, height=5000, width= 1000)